(*
 * Copyright (c) 2003 Dustin Sallings <dustin@spy.net>
 * Copyright (C) The #Seppo contributors. All rights reserved.
 * 
 * based on
 * https://github.com/dustin/snippets/blob/master/ocaml/lib/cdb.ml
 *)

(**
 * CDB Implementation.  http://cr.yp.to/cdb/cdb.txt
*)

(* The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
   hash of 5381.
*)

(** http://cr.yp.to/cdb/cdb.txt *)
module H = struct
  let _5381  = 5381 |> Optint.of_int
  let _0xFFffFFff = 0xFFffFFffL |> Optint.of_int64
  let ( + )    = Optint.add
  let ( << )   = Optint.shift_left
  let ( ^ )    = Optint.logxor
  let ( land ) = Optint.logand

  let foldr h c =
    let c = c |> Char.code |> Optint.of_int in
    (((h << 5) + h) ^ c) land _0xFFffFFff
end

let hash byt =
  byt |> Bytes.fold_left H.foldr H._5381 |> Optint.to_int32

let _0xff = Int32.of_int 0xff

let wri4b_le oc (byt : int -> int) =
  let wri idx = byt idx |> output_byte oc in
  wri 0;
  wri 1;
  wri 2;
  wri 3

let write_le oc (i32 : int) =
  wri4b_le oc (fun byt -> (i32 lsr (byt * 8)) land 0xff)

let write_le32 oc (i32 : int32) =
  wri4b_le oc (fun byt ->
      Int32.(to_int (logand (shift_right_logical i32 (byt * 8)) _0xff)))

type cdb_creator = {
  table_count : int array;
  (* Hash index pointers *)
  mutable pointers : (int32 * int32) list;
  out : out_channel;
}

let cdb_creator_of_out_channel out_channel : cdb_creator =
  let cdb =
    { table_count = Array.make 256 0; pointers = []; out = out_channel  }
  in
  (* Skip over the header *)
  seek_out cdb.out 2048;
  cdb

let open_out (fn : string) : cdb_creator =
  fn |> open_out_bin |> cdb_creator_of_out_channel

let hash_to_table h = Int32.(to_int (logand h _0xff))

let hash_to_bucket h len =
  Int32.(rem (shift_right_logical h 8) (of_int len) |> to_int)

let pos_out_32 x = x |> LargeFile.pos_out |> Int64.to_int32

let add cdc k v =
  (* Add the hash to the list *)
  let h = hash k in
  cdc.pointers <- (h, pos_out_32 cdc.out) :: cdc.pointers;
  let table = hash_to_table h in
  cdc.table_count.(table) <- succ cdc.table_count.(table);
  (* Add the data to the file *)
  write_le cdc.out (Bytes.length k);
  write_le cdc.out (Bytes.length v);
  output_bytes cdc.out k;
  output_bytes cdc.out v

(** Write one complete hash table *)
let process_table oc table_start slot_table slot_pointers i tc =
  (* Length of the table
   * https://github.com/howerj/cdb#:~:text=The%20number%20of%20buckets%20in%20the%20hash%20table%20is%20chosen%20as%20twice%20the%20number%20of%20populated%20entries%20in%20the%20hash%20table.
  *)
  let len = tc * 2 in
  (* Store the table position *)
  slot_table := (pos_out_32 oc, Int32.of_int len) :: !slot_table;
  (* Build the hash table *)
  let ht = Array.make len None in
  let cur_p = ref table_start.(i) in
  let lookup_slot n =
    try Hashtbl.find slot_pointers n
    with Not_found -> Int32.(zero, zero)
  in
  for _ = 0 to pred tc do
    let hp = lookup_slot !cur_p in
    cur_p := succ !cur_p;

    (* Find an available hash bucket *)
    let rec find_slot where =
      match ht.(where) with
      | None -> where
      | Some _ -> if succ where = len then find_slot 0 else find_slot (succ where)
    in
    let where = find_slot (hash_to_bucket (fst hp) len) in
    ht.(where) <- Some hp
  done;
  (* Write this hash table *)
  Array.iter
    (fun hpp ->
       let h,t = match hpp with
         | None -> Int32.(zero, zero)
         | Some x -> x
       in
       write_le32 oc h;
       write_le32 oc t )
    ht

let close_cdb_out cdc =
  let cur_entry = ref 0 in
  let table_start = Array.make 256 0 in
  (* Find all the hash starts *)
  Array.iteri
    (fun i x ->
       cur_entry := !cur_entry + x;
       table_start.(i) <- !cur_entry)
    cdc.table_count;
  (* Build out the slot pointers hash *)
  let slot_pointers = Hashtbl.create (List.length cdc.pointers) in
  (* Fill in the slot pointers *)
  List.iter
    (fun ((h,_) as hp) ->
       let table = hash_to_table h in
       table_start.(table) <- pred table_start.(table);
       Hashtbl.replace slot_pointers table_start.(table) hp)
    cdc.pointers;
  (* Write the shit out *)
  let slot_table = ref [] in
  (* Write out all hash tables *)
  Array.iteri
    (process_table cdc.out table_start slot_table slot_pointers)
    cdc.table_count;
  (* write out the pointer sets *)
  seek_out cdc.out 0;
  List.iter
    (fun (po,le) ->
       write_le32 cdc.out po;
       write_le32 cdc.out le)
    (List.rev !slot_table);
  close_out cdc.out

(** {1 Iterating a cdb file} *)

(* read a little-endian integer *)
let read_le f =
  let a = input_byte f in
  let b = input_byte f in
  let c = input_byte f in
  let d = input_byte f in
  a lor (b lsl 8) lor (c lsl 16) lor (d lsl 24)

(* Int32 version of read_le *)
let read_le32 f =
  let a = input_byte f in
  let b = input_byte f in
  let c = input_byte f in
  let d = input_byte f in
  Int32.(logor
           (of_int (a lor (b lsl 8) lor (c lsl 16)))
           (shift_left (of_int d) 24))

let iter (f : bytes * bytes -> bool) (fn : string) : unit =
  let fin = open_in_bin fn in
  try
    (* Figure out where the end of all data is *)
    let eod = read_le32 fin in
    (* Seek to the record section *)
    seek_in fin 2048;
    let rec loop () =
      (* (pos_in fin) < eod *)
      if Int32.compare (Int64.to_int32 (LargeFile.pos_in fin)) eod < 0 then (
        let klen = read_le fin in
        let dlen = read_le fin in
        let key = Bytes.create klen in
        let data = Bytes.create dlen in
        really_input fin key 0 klen;
        really_input fin data 0 dlen;
        if f (key, data) then loop ())
    in
    loop ();
    close_in fin
  with x ->
    close_in fin;
    raise x

type cdb_file = {
  f : in_channel;
  (* Position * length *)
  tables : (int32 * int) array;
}

let open_cdb_in (fn : string) : cdb_file =
  let fin = open_in_bin fn in
  let tables = Array.make 256 (Int32.zero, 0) in
  (* Set the positions and lengths *)
  Array.iteri
    (fun i _ ->
       let pos = read_le32 fin in
       let len = read_le fin in
       tables.(i) <- (pos, len))
    tables;
  { f = fin; tables }

let close_cdb_in cdf = close_in cdf.f

let find_all (cdf : cdb_file) (key : bytes) : bytes Stream.t =
  let kh = key |> hash in
  (* Find out where the hash table is *)
  let hpos, hlen = cdf.tables.(hash_to_table kh)
  and fd = cdf.f in
  let rec loop x =
    if x >= hlen
    then None
    else
      (* Calculate the slot containing these entries *)
      let lslot = (hash_to_bucket kh hlen + x) mod hlen in
      let spos = Int32.add (Int32.of_int (lslot * 8)) hpos in
      LargeFile.seek_in fd (Int64.of_int32 spos);
      let h = read_le32 fd in
      let pos = read_le32 fd in
      (* validate that we a real bucket *)
      if h = kh && Int32.(compare pos zero) > 0
      then (
        LargeFile.seek_in fd (Int64.of_int32 pos);
        let klen = read_le fd in
        if klen = Bytes.length key
        then (
          let dlen = read_le fd in
          let rkey = Bytes.create klen in
          really_input fd rkey 0 klen;
          if rkey = key
          then (
            let rdata = Bytes.create dlen in
            really_input fd rdata 0 dlen;
            Some rdata)
          else x |> succ |> loop)
        else x |> succ |> loop)
      else x |> succ |> loop
  in
  Stream.from loop

let find_first cdf key =
  try Some (key |> find_all cdf |> Stream.next)
  with Stream.Failure -> None
